[SystemZ] Handle f16 load positive/negative/complement without libcalls. #136286

JonPsson1 · 2025-04-18T08:55:01Z

This can be done directly with the (64-bit) target instruction as only the sign bit is changed.

llvmbot · 2025-04-18T08:55:25Z

@llvm/pr-subscribers-backend-systemz

Author: Jonas Paulsson (JonPsson1)

Changes

This can be done directly with the (64-bit) target instruction as only the sign bit is changed.

Full diff: https://github.com/llvm/llvm-project/pull/136286.diff

10 Files Affected:

(modified) llvm/lib/Target/SystemZ/SystemZISelLowering.cpp (+2)
(modified) llvm/lib/Target/SystemZ/SystemZInstrFP.td (+6-2)
(modified) llvm/test/CodeGen/SystemZ/fp-abs-01.ll (+2-3)
(modified) llvm/test/CodeGen/SystemZ/fp-abs-02.ll (+12)
(modified) llvm/test/CodeGen/SystemZ/fp-abs-03.ll (+2-3)
(modified) llvm/test/CodeGen/SystemZ/fp-abs-04.ll (+3-7)
(modified) llvm/test/CodeGen/SystemZ/fp-mul-08.ll (+3-4)
(modified) llvm/test/CodeGen/SystemZ/fp-mul-10.ll (+10-11)
(modified) llvm/test/CodeGen/SystemZ/fp-neg-01.ll (+3-4)
(modified) llvm/test/CodeGen/SystemZ/fp-neg-02.ll (+3-4)

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 75cd5a319557d..746e2b1a88a17 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -554,6 +554,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
     setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
     setOperationAction(ISD::FCOPYSIGN, MVT::f16, Legal);
+    for (auto Op : {ISD::FNEG, ISD::FABS})
+      setOperationAction(Op, MVT::f16, Legal);
   }
 
   for (unsigned I = MVT::FIRST_FP_VALUETYPE;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 7775f456bbdc1..12e99f7e5f47a 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -378,8 +378,10 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
 }
 // Generic form, which does not set CC.
 def LPDFR : UnaryRRE<"lpdfr", 0xB370, fabs, FP64,  FP64>;
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1 in {
+  def LPDFR_16 : UnaryRRE<"lpdfr", 0xB370, fabs, FP16,  FP16>;
   def LPDFR_32 : UnaryRRE<"lpdfr", 0xB370, fabs, FP32,  FP32>;
+}
 
 // Negative absolute value (Load Negative).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
@@ -389,8 +391,10 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
 }
 // Generic form, which does not set CC.
 def LNDFR : UnaryRRE<"lndfr", 0xB371, fnabs, FP64,  FP64>;
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1 in {
+  def LNDFR_16 : UnaryRRE<"lndfr", 0xB371, fnabs, FP16,  FP16>;
   def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32,  FP32>;
+}
 
 // Square root.
 let Uses = [FPC], mayRaiseFPException = 1 in {
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
index 0cfdefe3bd61b..fe573f1e3587e 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -7,10 +7,9 @@
 declare half @llvm.fabs.f16(half %f)
 define half @f0(half %f) {
 ; CHECK-LABEL: f0:
-; CHECK:      brasl %r14, __extendhfsf2@PLT
+; CHECK:      # %bb.0:
 ; CHECK-NEXT: lpdfr %f0, %f0
-; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NEXT: br %r14
   %res = call half @llvm.fabs.f16(half %f)
   ret half %res
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-02.ll b/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
index 4266a893e8a3b..752609ef6d00d 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -3,6 +3,18 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
 
+; Test f16.
+declare half @llvm.fabs.f16(half %f)
+define half @f0(half %f) {
+; CHECK-LABEL: f0:
+; CHECK:      # %bb.0:
+; CHECK-NEXT: lndfr %f0, %f0
+; CHECK-NEXT: br %r14
+  %abs = call half @llvm.fabs.f16(half %f)
+  %res = fneg half %abs
+  ret half %res
+}
+
 ; Test f32.
 declare float @llvm.fabs.f32(float %f)
 define float @f1(float %f) {
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll
index 29f2d06e75ff9..029ae2309cab9 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-03.ll
@@ -6,10 +6,9 @@
 declare half @llvm.fabs.f16(half %f)
 define half @f0(half %f) {
 ; CHECK-LABEL: f0:
-; CHECK:      brasl %r14, __extendhfsf2@PLT
+; CHECK:      # %bb.0:
 ; CHECK-NEXT: lpdfr %f0, %f0
-; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NEXT: br %r14
   %res = call half @llvm.fabs.f16(half %f)
   ret half %res
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-04.ll b/llvm/test/CodeGen/SystemZ/fp-abs-04.ll
index afaf3f6d22ac8..fbb43b69371f3 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-04.ll
@@ -6,13 +6,9 @@
 declare half @llvm.fabs.f16(half %f)
 define half @f0(half %f) {
 ; CHECK-LABEL: f0:
-; CHECK:      brasl   %r14, __extendhfsf2@PLT
-; CHECK-NEXT: lpdfr   %f0, %f0
-; CHECK-NEXT: brasl   %r14, __truncsfhf2@PLT
-; CHECK-NEXT: brasl   %r14, __extendhfsf2@PLT
-; CHECK-NEXT: lcdfr   %f0, %f0
-; CHECK-NEXT: brasl   %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK:      # %bb.0:
+; CHECK-NEXT: lndfr %f0, %f0
+; CHECK-NEXT: br %r14
   %abs = call half @llvm.fabs.f16(half %f)
   %res = fneg half %abs
   ret half %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-08.ll b/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
index 2b18abec8d555..e739bddd4f18f 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
@@ -8,13 +8,12 @@ declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
 
 define half @f0(half %f1, half %f2, half %acc) {
 ; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
+; CHECK-NOT: brasl
+; CHECK: lcdfr %f{{[0-9]+}}, %f4
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK-SCALAR: maebr %f0, %f9, %f8
+; CHECK-SCALAR: maebr %f0, %f8, %f10
 ; CHECK-VECTOR: wfmasb %f0, %f0, %f8, %f10
 ; CHECK: brasl %r14, __truncsfhf2@PLT
 ; CHECK: br %r14
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-10.ll b/llvm/test/CodeGen/SystemZ/fp-mul-10.ll
index 1ecf52fbde354..8f2cd23112cd0 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-10.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-10.ll
@@ -30,10 +30,10 @@ define half @f3_half(half %f1, half %f2, half %acc) {
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: wfmasb %f0, %f0, %f8, %f10
 ; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NOT: brasl
+; CHECK:      lcdfr %f0, %f0
+; CHECK-NEXT: lmg
+; CHECK-NEXT: br %r14
   %res = call half @llvm.fma.f16 (half %f1, half %f2, half %acc)
   %negres = fneg half %res
   ret half %negres
@@ -50,18 +50,17 @@ define float @f3(float %f1, float %f2, float %acc) {
 
 define half @f4_half(half %f1, half %f2, half %acc) {
 ; CHECK-LABEL: f4_half:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
+; CHECK-NOT: brasl
+; CHECK: lcdfr %f0, %f4
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: brasl %r14, __extendhfsf2@PLT
 ; CHECK: wfmasb %f0, %f0, %f8, %f10
 ; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK-NOT: brasl
+; CHECK:      lcdfr %f0, %f0
+; CHECK-NEXT: lmg
+; CHECK-NEXT: br %r14
   %negacc = fneg half %acc
   %res = call half @llvm.fma.f16 (half %f1, half %f2, half %negacc)
   %negres = fneg half %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
index a8fe8d5da7c8a..0e19d9647178f 100644
--- a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -6,10 +6,9 @@
 ; Test f16.
 define half @f0(half %f) {
 ; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK:      # %bb.0:
+; CHECK-NEXT: lcdfr %f0, %f0
+; CHECK-NEXT: br %r14
   %res = fneg half %f
   ret half %res
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-02.ll b/llvm/test/CodeGen/SystemZ/fp-neg-02.ll
index 848c4740d8540..d0802878f8f8b 100644
--- a/llvm/test/CodeGen/SystemZ/fp-neg-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-neg-02.ll
@@ -5,10 +5,9 @@
 ; Test f16.
 define half @f0(half %f) {
 ; CHECK-LABEL: f0:
-; CHECK: brasl %r14, __extendhfsf2@PLT
-; CHECK: lcdfr %f0, %f0
-; CHECK: brasl %r14, __truncsfhf2@PLT
-; CHECK: br %r14
+; CHECK:      # %bb.0:
+; CHECK-NEXT: lcdfr %f0, %f0
+; CHECK-NEXT: br %r14
   %res = fneg half %f
   ret half %res
 }

tgross35 · 2025-04-21T21:35:09Z

Was the ABI for f16 published anywhere public by the way? I'm trying to understand how l*dfr could work for both f32 and f16, I'm assuming they are passed in the same registers and aligned at the MSB.

uweigand · 2025-04-22T08:38:47Z

Was the ABI for f16 published anywhere public by the way? I'm trying to understand how l*dfr could work for both f32 and f16, I'm assuming they are passed in the same registers and aligned at the MSB.

It's not yet published (we're planning on doing this soon), but you're indeed correct that they are passed in the same registers as f32 and 64, and aligned at the MSB. (That's why the same set of instructions already works for both f32 and f64.)

uweigand

LGTM, thanks!

uweigand · 2025-04-22T08:39:17Z

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

@@ -554,6 +554,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::BITCAST, MVT::i16, Custom);
    setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
    setOperationAction(ISD::FCOPYSIGN, MVT::f16, Legal);
+    for (auto Op : {ISD::FNEG, ISD::FABS})


Minor nit: maybe add FCOPYSIGN to the loop then?

Handle f16 load positive/negative/complement without libcalls.

7632f46

JonPsson1 added the backend:SystemZ label Apr 18, 2025

JonPsson1 requested a review from uweigand April 18, 2025 08:55

uweigand approved these changes Apr 22, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[SystemZ] Handle f16 load positive/negative/complement without libcalls. #136286

[SystemZ] Handle f16 load positive/negative/complement without libcalls. #136286

JonPsson1 commented Apr 18, 2025

llvmbot commented Apr 18, 2025

tgross35 commented Apr 21, 2025

uweigand commented Apr 22, 2025

uweigand left a comment

uweigand Apr 22, 2025

[SystemZ] Handle f16 load positive/negative/complement without libcalls. #136286

Are you sure you want to change the base?

[SystemZ] Handle f16 load positive/negative/complement without libcalls. #136286

Conversation

JonPsson1 commented Apr 18, 2025

llvmbot commented Apr 18, 2025

tgross35 commented Apr 21, 2025

uweigand commented Apr 22, 2025

uweigand left a comment

Choose a reason for hiding this comment

uweigand Apr 22, 2025

Choose a reason for hiding this comment